import requests as req
from pyquery import PyQuery as pq
import re
import time
import xlwt
import csv
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from pyquery import PyQuery as pq
import time
import eventlet


class ZhiHuCrawler:

    def __init__(self, max_page, sleep_seconds, is_show=False):
        self.max_page = max_page
        self.sleep_seconds = sleep_seconds
        prefs = {"profile.managed_default_content_settings.images": 2, 'permissions.default.stylesheet': 2}
        if is_show:
            self.driver = webdriver.Chrome()
        else:
            chrome_options = webdriver.ChromeOptions()
            chrome_options.add_argument('--headless')
            chrome_options.add_argument('--disable-gpu')
            self.driver = webdriver.Chrome(chrome_options=chrome_options)

    def get_answer(self, obj_id):
        chrome_options = webdriver.ChromeOptions()
        prefs = {"profile.managed_default_content_settings.images": 2, 'permissions.default.stylesheet': 2}
        chrome_options.add_experimental_option("prefs", prefs)
        self.driver = webdriver.Chrome(chrome_options=chrome_options)
        proxies = {
        }
        header = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163'
        }
        start = 0
        # 结束页面
        end = self.max_page
        # 页面步长
        step = 1
        # 抓取间隔(秒)
        sleep_seconds = self.sleep_seconds
        data_list = []
        base_url = 'https://www.zhihu.com/question/%d' % (obj_id)
        self.driver.get(base_url)
        print('url: %s' % base_url)
        doc = pq(self.driver.page_source)
        content = doc('.List-item:nth-child(n+' + str(0) + ')').items()
        count = 0
        for i in range(start, end * step, step):
            self.driver.execute_script('window.scrollTo(0, document.body.scrollHeight+10000000)')
            # print('休眠%f秒' % sleep_seconds)
            # time.sleep(sleep_seconds)
            print('获取第 %d 页' % (i / step + 1))
            print('下拉列表起始索引：%s' % str(len(data_list) + 1))
            for item in content:
                # 用户信息
                user_info = item('.AuthorInfo-content')
                # 用户名
                user = user_info('.Popover').text()
                # 用户签名
                autograph = user_info('.AuthorInfo-badgeText').text()
                # 回答
                answer = item('.RichContent-inner').text()
                # 点赞数
                like = item('.AnswerItem-extraInfo').text()
                if like:
                    like = re.findall('\d+', like)[0]
                data_list.append([user, autograph, like, answer])
                print([user, autograph, like, answer])
            print('当前批次已经获取到的数据量 : %d' % len(data_list))

            while not next(content, None):
                self.driver.execute_script('window.scrollTo(0, document.body.scrollHeight+10000000)')
                doc = pq(self.driver.page_source)
                content = doc('.List-item:nth-child(n+' + str(len(data_list)) + ')').items()
                count = count+1
                if count > 100:
                    return data_list
                time.sleep(sleep_seconds)
        return data_list

    @staticmethod
    def save_to_csv(data_list, path, save_type='w'):
        with open(path, save_type, encoding='utf-8-sig', newline='') as f:
            writer = csv.writer(f)
            writer.writerows(data_list)
        print('文件写入成功')

    def login(self):
        self.driver.get('https://www.zhihu.com/')

        print(self.driver.find_element_by_css_selector(
            '#root > div > main > div > div > div.Card.SignContainer-content > div > form > div.SignFlow-tabs > div.SignFlow-qrcodeTab > svg > g > image').click())
        # self.driver.find_element_by_name('username').send_keys(username)
        # self.driver.find_element_by_name('password').send_keys(password)
        # self.driver.find_element_by_class_name('SignFlow-submitButton').click()
        WebDriverWait(self.driver, 300).until(lambda x: x.find_element_by_class_name("AppHeader-profileAvatar"))
        try:
            un = self.driver.find_element_by_class_name(
                'AppHeader-profileAvatar')
            if un:
                print('登录成功')
        except IndexError as e:
            print('登录失败')
            self.driver.quit()
            quit(0)
        time.sleep(3)
